[IA64] Fix I&D cache incoherency after vcpu migration
authorawilliam@xenbuild2.aw <awilliam@xenbuild2.aw>
Thu, 15 Feb 2007 17:25:33 +0000 (10:25 -0700)
committerawilliam@xenbuild2.aw <awilliam@xenbuild2.aw>
Thu, 15 Feb 2007 17:25:33 +0000 (10:25 -0700)
Windows on HVM ocasionally crashes with BSOD especially on boot time.
I finally found out the cause is PAL_CACHE_FLUSH(cache_type=4).
The cache_type means an argument of PAL_CACHE_FLUSH and cache_type=4
makes local instruction caches coherent with the data caches.
See SDM vol2 11.10.3, PAL_CACHE_FLUSH.
FYI, Linux never uses cache_type=4.

Currently PAL_CACHE_FLUSH is called on only local cpu and caches on the
other cpus are still incoherent.

Attached patch does:
- When cache_type=1,2,3 that means flushing caches on local cpus,
  caches on the other cpus becomes to be flushed also.
  It might be overkill and not efficient. But I think it's permissive
  since these cache_type are seldom used.

- When cache_type=4, the actual PAL call to the other cpus is deferred
  until the vcpu migration occurs or the cpu becomes idle.
  Since Windows uses cache_type=4 quite often and many vcpus on SMP
  environment call PAL_CACHE_FLUSH simultaneously.

Signed-off-by: Kouya Shimura <kouya@jp.fujitsu.com>
xen/arch/ia64/xen/domain.c
xen/arch/ia64/xen/fw_emul.c
xen/include/asm-ia64/domain.h
xen/include/asm-ia64/linux-xen/asm/pal.h

index c786a9d6e88937fd2d97753f1eba28246ac9889b..97a844c17efac23f1eeea951ef35f6fef36d781d 100644 (file)
@@ -138,6 +138,28 @@ static void flush_vtlb_for_context_switch(struct vcpu* prev, struct vcpu* next)
        }
 }
 
+static void flush_cache_for_context_switch(struct vcpu *next)
+{
+       extern cpumask_t cpu_cache_coherent_map;
+       int cpu = smp_processor_id();
+
+       if (is_idle_vcpu(next) ||
+           __test_and_clear_bit(cpu, &next->arch.cache_coherent_map)) {
+               if (cpu_test_and_clear(cpu, cpu_cache_coherent_map)) {
+                       unsigned long flags;
+                       u64 progress = 0;
+                       s64 status;
+
+                       local_irq_save(flags);
+                       status = ia64_pal_cache_flush(4, 0, &progress, NULL);
+                       local_irq_restore(flags);
+                       if (status != 0)
+                               panic_domain(NULL, "PAL_CACHE_FLUSH ERROR, "
+                                            "cache_type=4 status %lx", status);
+               }
+       }
+}
+
 static void lazy_fp_switch(struct vcpu *prev, struct vcpu *next)
 {
        /*
@@ -260,6 +282,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
     }
    
     flush_vtlb_for_context_switch(prev, current);
+    flush_cache_for_context_switch(current);
     context_saved(prev);
 }
 
index e96a2ad9024146eddae55886b2ea82274f98a3f3..1a2f5b5173a3100bfdce5e4cd01c619b5800e0b2 100644 (file)
@@ -379,6 +379,28 @@ sal_emulator (long index, unsigned long in1, unsigned long in2,
        return ((struct sal_ret_values) {status, r9, r10, r11});
 }
 
+cpumask_t cpu_cache_coherent_map;
+
+struct cache_flush_args {
+       u64 cache_type;
+       u64 operation;
+       u64 progress;
+       long status;
+};
+
+static void
+remote_pal_cache_flush(void *v)
+{
+       struct cache_flush_args *args = v;
+       long status;
+       u64 progress = args->progress;
+
+       status = ia64_pal_cache_flush(args->cache_type, args->operation,
+                                     &progress, NULL);
+       if (status != 0)
+               args->status = status;
+}
+
 struct ia64_pal_retval
 xen_pal_emulator(unsigned long index, u64 in1, u64 in2, u64 in3)
 {
@@ -542,9 +564,27 @@ xen_pal_emulator(unsigned long index, u64 in1, u64 in2, u64 in3)
                status = ia64_pal_register_info(in1, &r9, &r10);
                break;
            case PAL_CACHE_FLUSH:
+               if (in3 != 0) /* Initially progress_indicator must be 0 */
+                       panic_domain(NULL, "PAL_CACHE_FLUSH ERROR, "
+                                    "progress_indicator=%lx", in3);
+
                /* Always call Host Pal in int=0 */
                in2 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
 
+               if (in1 != PAL_CACHE_TYPE_COHERENT) {
+                       struct cache_flush_args args = {
+                               .cache_type = in1,
+                               .operation = in2,
+                               .progress = 0,
+                               .status = 0
+                       };
+                       smp_call_function(remote_pal_cache_flush,
+                                         (void *)&args, 1, 1);
+                       if (args.status != 0)
+                               panic_domain(NULL, "PAL_CACHE_FLUSH ERROR, "
+                                            "remote status %lx", args.status);
+               }
+
                /*
                 * Call Host PAL cache flush
                 * Clear psr.ic when call PAL_CACHE_FLUSH
@@ -556,6 +596,13 @@ xen_pal_emulator(unsigned long index, u64 in1, u64 in2, u64 in3)
                        panic_domain(NULL, "PAL_CACHE_FLUSH ERROR, "
                                     "status %lx", status);
 
+               if (in1 == PAL_CACHE_TYPE_COHERENT) {
+                       int cpu = current->processor;
+                       cpus_setall(current->arch.cache_coherent_map);
+                       cpu_clear(cpu, current->arch.cache_coherent_map);
+                       cpus_setall(cpu_cache_coherent_map);
+                       cpu_clear(cpu, cpu_cache_coherent_map);
+               }
                break;
            case PAL_PERF_MON_INFO:
                {
index 66f9caec2e3b885b82103242f1089892f18ebfe0..d15265d7d93beeb225015c52e4cdbc9d6a33d5d4 100644 (file)
@@ -201,6 +201,7 @@ struct arch_vcpu {
 #endif
 #define INVALID_PROCESSOR       INT_MAX
     int last_processor;
+    cpumask_t cache_coherent_map;
 };
 
 #include <asm/uaccess.h> /* for KERNEL_DS */
index 7aaaab33edcf6a6f7b6072bedb5e25dd1784cccf..71e6a4ed89edc75603a919321362646e0fc99086 100644 (file)
@@ -112,6 +112,9 @@ typedef u64                         pal_cache_type_t;
 #define PAL_CACHE_TYPE_INSTRUCTION     1       /* Instruction cache */
 #define PAL_CACHE_TYPE_DATA            2       /* Data or unified cache */
 #define PAL_CACHE_TYPE_INSTRUCTION_DATA        3       /* Both Data & Instruction */
+#ifdef XEN
+#define PAL_CACHE_TYPE_COHERENT                4       /* Make I&D-cache coherent */
+#endif
 
 
 #define PAL_CACHE_FLUSH_INVALIDATE     1       /* Invalidate clean lines */